import numpy as np
import pandas as pd
import statsmodels.formula.api as sm
import matplotlib.pyplot as plt
from sklearn.metrics import mean_squared_error
from pathlib import Path
from sklearn.neural_network import MLPRegressor

filepath = Path('folder/subfolder/out.csv')
filepath.parent.mkdir(parents=True, exist_ok=True)
mill = pd.read_feather("feather.feather")
scenario = mill.columns[1:7]
measure = mill.columns.symmetric_difference(scenario)
X = mill.columns.symmetric_difference(["VB", "RUL", "case"])
f = "VB ~ " + ' + '.join([col for col in X])

result = pd.DataFrame(columns=['iteration', 'method', 'rmse', 'rrse'])
performance = pd.DataFrame(columns=['method', 'meanRMSE', 'meanRRSE'])
error = pd.DataFrame(columns=['iteration', 'case', 'run', 'Taylor', 'NN'])

mill = mill[(mill.case != 1)]

case = mill[['case', 'DOC', 'feed', 'material1']].drop_duplicates()
case_m1 = case[case['material1'] == 1]
case_m2 = case[case['material1'] == 0]
n_case_train_for_m = 6

for k in range(0, 10):
    train_m1 = mill[mill['case'] == 0]
    train_m2 = mill[mill['case'] == 0]

    train_m1 = pd.concat([train_m1, mill[mill["case"] == case_m1["case"].sample(1).iloc[0]]])
    train_m2 = pd.concat([train_m2, mill[mill["case"] == case_m2["case"].sample(1).iloc[0]]])

    for j in range(2, 5):
        while 1:
            c = case_m1["case"].sample(1).iloc[0]
            parametri = case_m1[case_m1["case"] == c][["DOC", "feed"]]

            if len(pd.merge(train_m1[["DOC", "feed"]], parametri, how='inner')) == 0:
                break
        train_m1 = pd.concat([train_m1, mill[mill["case"] == c]])

    if n_case_train_for_m - 4 > 0:
        for j in range(1, n_case_train_for_m - 3):
            while (1):
                c = case_m1["case"].sample(1).iloc[0]
                if not (train_m1["case"].isin([c]).any()):
                    break
            train_m1 = pd.concat([train_m1, mill[mill["case"] == c]])

    for j in range(2, 5):
        while 1:
            c = case_m2["case"].sample(1).iloc[0]
            parametri = case_m2[case_m2["case"] == c][["DOC", "feed"]]
            if len(pd.merge(train_m2[["DOC", "feed"]], parametri, how='inner')) == 0:
                break
        train_m2 = pd.concat([train_m2, mill[mill["case"] == c]])
    if n_case_train_for_m - 4 > 0:
        for j in range(1, n_case_train_for_m - 3):
            while (1):
                c = case_m2["case"].sample(1).iloc[0]
                if not (train_m2["case"].isin([c]).any()):
                    break
            train_m2 = pd.concat([train_m2, mill[mill["case"] == c]])

    train = pd.concat([train_m1, train_m2])
    cases = train["case"].unique()
    test = mill.query("case not in @cases")


    coef_m1 = pd.DataFrame(columns=list("kswz"))
    coef_m2 = pd.DataFrame(columns=list("kswz"))


#TAYLOR
    lntrain_m1 = np.log(train_m1[(train_m1.VB != 0) & (train_m1.time != 0)][["VB", "time", "DOC", "feed"]])
    lntrain_m2 = np.log(train_m2[(train_m2.VB != 0) & (train_m2.time != 0)][["VB", "time", "DOC", "feed"]])
    f_m1 = sm.ols(formula="time ~ feed + DOC + VB", data=lntrain_m1).fit()
    f_m2 = sm.ols(formula="time ~ feed + DOC + VB", data=lntrain_m2).fit()


    coef_m1.loc[-1] = np.asarray(f_m1.params)
    coef_m1.iloc[0]["k"] = np.exp(coef_m1.iloc[0]["k"])
    coef_m2.loc[-1] = np.asarray(f_m2.params)
    coef_m2.iloc[0]["k"] = np.exp(coef_m2.iloc[0]["k"])

    est_VB_T = list(range(len(test)))

    for i in range(0, len(test)):
        if (test.iloc[i].material1):
            est_VB_T[i] = np.exp((np.log(test.iloc[i].time) - np.log(coef_m1.iloc[0].k) - coef_m1.iloc[0].s * np.log(
                test.iloc[i].feed) - coef_m1.iloc[0].w * np.log(test.iloc[i].DOC)) / coef_m1.iloc[0].z)
        else:
            est_VB_T[i] = np.exp((np.log(test.iloc[i].time) - np.log(coef_m2.iloc[0].k) - coef_m2.iloc[0].s * np.log(
                test.iloc[i].feed) - coef_m2.iloc[0].w * np.log(test.iloc[i].DOC)) / coef_m2.iloc[0].z)
    RRSE_numpy = np.sqrt(
        np.sum(np.square(np.subtract(test.VB, est_VB_T))) / np.sum(np.square(np.subtract(test.VB, np.mean(test.VB)))))

    d2 = {'iteration': k, 'method': "Taylor", 'rmse': mean_squared_error(test.VB, est_VB_T), 'rrse': RRSE_numpy}
    df2 = pd.DataFrame(d2, index={len(result)})
    result = pd.concat([result, df2])
    est_VB_T_train = pd.DataFrame(np.nan, index=range(0, len(train)), columns=['case', 'run', 'VB'])

    for i in range(0, len(train)):
        est_VB_T_train.iloc[i].case = train.iloc[i].case
        est_VB_T_train.iloc[i].run = train.iloc[i].run
        if train.iloc[i].material1 == 1:
            est_VB_T_train.iloc[i].VB = np.exp((np.log(train.iloc[i].time) - np.log(coef_m1.iloc[0].k) - coef_m1.iloc[
                0].s * np.log(train.iloc[i].feed) - coef_m1.iloc[0].w * np.log(train.iloc[i].DOC)) / coef_m1.iloc[0].z)
        else:
            est_VB_T_train.iloc[i].VB = np.exp((np.log(train.iloc[i].time) - np.log(coef_m2.iloc[0].k) - coef_m2.iloc[
                0].s * np.log(train.iloc[i].feed) - coef_m2.iloc[0].w * np.log(train.iloc[i].DOC)) / coef_m2.iloc[0].z)
    # neural network (NN)--------------------------------------------------------------------------------------------

    clf = MLPRegressor(hidden_layer_sizes=(100, 100, 100), max_iter=80, alpha=0.001, solver='lbfgs', verbose=20,
                        random_state = 10, tol=0.001)

    y = np.asarray(train['VB'], dtype="float")

    x = train.drop('VB', axis=1)
    clf.fit(x, y)
    est_VB_NN = clf.predict(test.drop('VB', axis=1))

    RRSE_numpy = np.sqrt(
        np.sum(np.square(np.subtract(test.VB, est_VB_NN))) / np.sum(np.square(np.subtract(test.VB, np.mean(test.VB)))))
    d2 = {'iteration': k, 'method': "NN", 'rmse': mean_squared_error(test.VB, est_VB_NN), 'rrse': RRSE_numpy}
    df2 = pd.DataFrame(d2, index={len(result)})
    result = pd.concat([result, df2])
    res = clf.predict(train.drop('VB', axis=1))
    est_VB_NN_train = pd.concat([train['case'], train['run'], pd.Series(res)], axis=1, keys=['case', 'run', 'VB'])
    est_VB_NN_train = est_VB_NN_train.fillna(0)
# Hybrid---------------------------------------------------------------
# calcolo pesi per run
    weights = pd.DataFrame(columns=['run', 'w'])
    for r in range(0, int(max(mill.run))+1):
        weights = pd.concat([weights, pd.DataFrame({'run': r, 'w': 0}, index={r})], ignore_index=True)

        if len(train[train.run == r].run) > 0:
            vb_run = train[train.run == r]
            rmse_best = mean_squared_error(vb_run.VB, est_VB_T_train[est_VB_T_train.run == r].VB)
            for p in range(1, 100):
                p = p / 100
                xwer = (est_VB_T_train[est_VB_T_train.run == r].reset_index().VB)

                ywer = (est_VB_NN_train[est_VB_NN_train.run == r].reset_index().VB)* (1-p)
                est_VB = (xwer*p + ywer *(1-p))*1.25

                if mean_squared_error(vb_run.VB, est_VB) <= rmse_best:
                    rmse_best = mean_squared_error(vb_run.VB, est_VB)
                    weights.at[r, 'w'] = p

        else:
            weights.at[r, 'w'] = weights.w.iloc[r-1]

    # provisory calculations od Hybrid model
    est_VB_h = pd.DataFrame(columns=['case', 'run', 'VB'])

    for t in range(0, len(test)):
        p = weights[weights.run == test.iloc[t].run].w

        est_VB_h = pd.concat([est_VB_h, pd.DataFrame({'case': test.iloc[t].case, 'run': test.iloc[t].run, 'VB': (float(p)*est_VB_T[t] + (1-float(p))*est_VB_NN[t])}, index={t})],ignore_index=True)
    RRSE_numpy = np.sqrt(
        np.sum(np.square(np.subtract(test.VB, est_VB_h.VB))) / np.sum(np.square(np.subtract(test.VB, np.mean(test.VB)))))
    print('est_VB_T')
    print(est_VB_T)
    print('est_VB_NN')
    print(est_VB_NN)
    print('est_VB_H')
    print(est_VB_h)

    d2 = {'iteration': k, 'method': "Hybrid", 'rmse': mean_squared_error(test.VB, est_VB_h.VB), 'rrse': RRSE_numpy}
    df2 = pd.DataFrame(d2, index={len(result)})
    result = pd.concat([result, df2])
result.rmse = result.rmse.multiply(10)
print(result)
print(result[result.method == 'Taylor'])
print(result[result.method == 'NN'])
print(result[result.method == 'Hybrid'])
plt.plot(result.iteration.unique(), result[result['method'] == 'Taylor'].rmse,'ro', label ='Taylor')
plt.plot(result.iteration.unique(), result[result['method'] == 'NN'].rmse,'bo', label ='NN')
plt.plot(result.iteration.unique(), result[result['method'] == 'Hybrid'].rmse,'go', label ='Hybrid')
plt.xlabel("kth iteration")
plt.ylabel("RMSE")
plt.legend(numpoints=1)
plt.title('Overall results')
plt.show()
